********************************************************************************
*	PROJECT: Childhood confidence and long-term outcomes (PSID)
*	PURPOSE: Make figure 1, distributions of self-assessed and demonstrated ability 
*	PUBLISHED: August 2022
*	CONTACT: Hannah Ruebeck, hruebeck@mit.edu	
********************************************************************************


clear all
set more off
set maxvar 10000
pause on

** Paths;

local path "/Users/XXXXX/Downloads/replication materials"

local CLEAN "`path'/clean"
local OUT "`path'/out/figures"

set scheme s1color
graph set window fontface "Times New Roman"
graph set eps fontface "Times New Roman"

/*
Description: This code makes 3 panels of figure 1:
		1. Histogram of test score percentiles
		2. Histograms of self-assessed ability
		4. Bin scatters of test scores by self-assessed ability
*/


********************************************************************************
// Programs

program bin_var
    syntax, var(varname) min(int) max(int)
    gen `var'_bin = .
    forval j = `min'/`max' {
        local i = `j'-0.5
	local k = `j'+0.5
        replace `var'_bin = `j' if `var'>=`i' & `var'<`k'
    }
end

********************************************************************************
// Parameters

local w 0
local subj math
local test ap
local color ebblue
local color2 gs8


********************************************************************************
// Set up

use "`CLEAN'/CDS_TAS_PSID_analysis_1year.dta", clear

local Subject = proper("`subj'")
label var c_fst_`subj'cnf_rateownskill "How good at `subj' are you?"






********************************************************************************
// Test score distributions

	* make bins and counts
	gen bin_`subj' = .
	forval i = 3(4)99 {
		local k = `i'-3
		replace bin_`subj' = `i' if inrange(c_fst_chld_`subj'_`test'_pctile, `k', `i')
	}
	replace bin_`subj' = bin_`subj' - 3
	 
	bys bin_`subj': egen num_bin_`subj'_w`w' = total(weight_confsamp`w')
	egen total_`subj'_w`w' = total(weight_confsamp`w')
	gen pct_bin_`subj'_w`w' = 100*(num_bin_`subj'_w`w'/total_`subj'_w`w')
	
		
	* make graph
	tw bar pct_bin_`subj'_w`w' bin_`subj', name(`subj'_testscores_w`w', replace) barwidth(4) color(`color')  ///
		xti(`Subject' percentile rank in nationally normed sample) ylabel(#8) ///
		plotregion(style(none)) yscale(range(0)) yti(Percent of sample) 
		
		graph export "`OUT'/fig1_hist_`subj'_testscores_w`w'.eps", replace
		
			
********************************************************************************
// Self-assessment distributions
		
	* make counts
	local var rateownskill
	bys c_fst_`subj'cnf_`var': egen num_conf_`subj'_w`w' = total(weight_confsamp`w')
	gen pct_conf_`subj'_w`w' = 100*(num_conf_`subj'_w`w'/total_`subj'_w`w')
	
	local labels `"1 "Not at all good (1)" 4 "Okay (4)" 7 "Very good (7)""'
	local title: variable label c_fst_`subj'cnf_`var'
		
	* make graph
	tw bar pct_conf_`subj'_w`w' c_fst_`subj'cnf_`var', ///
		horizontal barwidth(0.25) yticks(1(1)7, tlc(`color')) ///
		ylabel(`labels', angle(360) labsize(small) labcol(`color')) ///
		color(`color') xti(Percent) ytitle("") ///
		title(`title', size(medium)) name(`subj'_`var'_w`w')  plotregion(style(none))
		
		graph export "`OUT'/fig1_hist_`subj'_`var'_w`w'.eps", replace
		
********************************************************************************
// Bin scatter	

		* calculate means and standard errors
		tempfile data
		save `data', replace
				
		collapse (mean) mean_`subj'_pctile = c_fst_chld_`subj'_`test'_pctile ///
				 (semean) sd_`subj'_pctile = c_fst_chld_`subj'_`test'_pctile ///
				 [aweight = weight_confsamp`w'] if !mi(c_fst_rsd_`subj'cnf_ratesk7_w`w') ///
				 , by(c_fst_`subj'cnf_`var')
		gen ll = mean_`subj'_pctile - (1.96*sd_`subj'_pctile)
		gen ul = mean_`subj'_pctile + (1.96*sd_`subj'_pctile)	
		gen use_scat = 1	
		append using `data'
		
		* make graph
		twoway rcap ul ll c_fst_`subj'cnf_`var' if use_scat == 1, lcol(`color') lwidth(thin) /// 
			|| scatter mean_`subj'_pctile c_fst_`subj'cnf_`var' if use_scat == 1, col(`color') ///
			|| lfit c_fst_chld_`subj'_`test'_pctile c_fst_`subj'cnf_`var' [aweight = weight_confsamp`w'] ///
			if !mi(c_fst_rsd_`subj'cnf_ratesk7_w`w') & use_scat != 1, lcol(`color2') ///
			name(bin_`subj'_`var'_ci, replace) xti(`title') yti(`Subject' pctile rank in nationally normed sample) ///
			xlabel(1 "Not at all good" 4 "Okay" 7 "Very good", labsize(small)) xtick(1(1)7)  plotregion(style(none)) ///
			legend(off)
			
		graph export "`OUT'/fig1_binsc_`subj'_testscores_`var'_w`w'.eps", replace
			

